***Created on: Jan 2020 
**-- Main file to construct various markup measures
cap log close
log using "/projects/programs/jpe_revision/disclosure_feb_2020", text replace

********************************************************************************
********************************************************************************
***  Define Programs 
********************************************************************************
********************************************************************************
*** Count number of observations and firms 
	cap program drop countN
	program define countN
	syntax, sample(string)
	version 13
	quietly {
		preserve
		keep if `sample'==1
		cap drop temp*
		count
		local N_obs = r(N)						//number of observations
		egen tempfirm = group(firmid)
		sum tempfirm 
		local N_firm = r(max)						
		egen tempfirm_ind = group(naics firmid)
		sum tempfirm_ind 
		local N_firm_ind = r(max)
		drop temp*
		restore
		}	
		disp "`sample' " "#obs: " `N_obs' " #firm_ind_id:" `N_firm_ind' " #firmid:" `N_firm' 
		
	end

*** Winsorize variables
	cap program drop XJ_winsor
	program define XJ_winsor
	syntax, varlist(string) cell(string) pl(integer) pu(integer)
	version 13
	quietly {
	cap drop temp*
	foreach var in `varlist' {
		rename `var' nw_`var'
		by `cell', sort: egen temp1 = pctile(nw_`var'), p(`pl')
		by `cell', sort: egen temp2 = pctile(nw_`var'), p(`pu')
		gen `var' = nw_`var'
		replace `var' = temp1 if `var'<temp1 & nw_`var'!=. 
		replace `var' = temp2 if `var'>temp2 & nw_`var'!=.
		drop temp*
	}
	}
	end

********************************************************************************
********************************************************************************
***  I. CONSTRUCT SAMPLES
********************************************************************************
********************************************************************************
clear 
cd "/projects/programs/jpe_revision"
use "/projects/data/markup_estimation.dta", clear

*** Construct markup 
gen l0 = sw0
cap drop mp_*
gen mp_cshare = al*tvs/(l0)							//cost-share w/ total revenue

	** Winsorize markup at 5 and 1 percentile 
	XJ_winsor, varlist(mp_cshare)    cell(year) pl(5) pu(95)		//winsorized at 5 percentile 
	XJ_winsor, varlist(nw_mp_cshare) cell(year) pl(1) pu(99)		//winsorized at 1 percentile 
	rename nw_mp_cshare mp_cshare_w1

*** Aggregate to firm level 
gcollapse (sum) tvs va1 sw sw_nl l0 (mean) mp_cshare mp_cshare_w1 [aweight=l0], by (year naics firmid) 

*** SAMPLE 1 (Baseline sample)
gen sample1 = 1

	** SAMPLE 1_YEAR
	foreach year of numlist 1972(5)2012 {
		gen sample1_`year' = (year==`year')
	}
	** SAMPLE 1_BIN: Bins of sales share 
	egen ind_tvs = sum(tvs), by (naics year)
	gen ln_mkt_sh = log(tvs/ind_tvs)
	xtile bin_ln_mkt_sh = ln_mkt_sh, n(10)
	tab bin_ln_mkt_sh
	
	foreach i of numlist 1(1)10 {
		gen sample1_bin`i' = (bin_ln_mkt_sh==`i')
	}
	
*** SAMPLE 1A (markup >1)
gen sample1a = 0

	gen lhs_cshare = 1/mp_cshare + log(1-1/mp_cshare)
	replace sample1a = 1 if lhs_cshare!=. 
	/// exclude industry-year cells with less than the minimum # of observations required
	cap drop temp*
	egen temp1 = sum(sample1a), by(naics year)
	replace sample1a = 0 if temp1<=####
	
	
*** SAMPLE 2A (markup >1 and firms appearing at least the minimum # of times)
egen firm_ind_id=group(naics firmid)

	///identify industry-year cells that contain less than the minimum number of firms required after dropping firms that showed up less than the minimum # of times required in the sample 
	preserve
	keep if sample1a==1
	forval i = 1(1)5 {
		cap drop temp*
		egen temp1 = sum(sample1a), by(firm_ind_id)
		drop if temp1<=####
		egen temp2 = sum(sample1a), by(naics year)
		drop if temp2<=####
	}
	gen sample2a = 1 
	keep year firmid naics firm_ind_id sample2a 
	tempfile sample2a
	save `sample2a', replace 
	restore
	
	preserve
	keep if sample1a==1
	cap drop temp*
	egen temp1 = sum(sample1a), by(firm_ind_id)
	drop if temp1<=####
	keep year firmid naics firm_ind_id
	
	merge 1:1 year firm_ind_id using `sample2a'
	gen tag = _merge==1 
	drop _merge 
	
	tempfile temptag 
	save `temptag', replace 
	restore 

merge 1:1 year firm_ind_id using `temptag'
replace sample1a = 0 if tag==1			//update sample1a
gen sample1b = (sample1a==0)			//implicit sample 

	** SAMPLE 1A_NAICS3                     //all sector numbers shown below were released in the associated output
	gen naics3 = floor(naics/1000)
	
	foreach sector of numlist 311/316 321/327 331/337 339{
		gen sample1a_`sector' = (naics3==`sector' & sample1a==1)
	}
	
replace sample2a = 0 if sample2a==.	
gen sample2b = (sample2a==0 & sample1a==1)	//implicit sample

	*** SAMPLE 2A_NAICS
	foreach sector of numlist 311/316 321/327 331/337 339{
		gen sample2a_`sector' = (naics3==`sector' & sample2a==1)
		gen sample2b_`sector' = (naics3==`sector' & sample2a==0 & sample1a==1)	//implicit sample
	}
	/*
	*** sample counts 
	countN, sample(sample1)
	foreach year of numlist 1972(5)2012 {
		countN, sample(sample1_`year')
	}
	foreach i of numlist 1(1)10 {
		countN, sample(sample1_bin`i')
	}
	countN, sample(sample1a)
	countN, sample(sample1b)
	foreach sector of numlist 311/316 321/327 331/337 339{
		countN, sample(sample1a_`sector')
	}
	countN, sample(sample2a)
	countN, sample(sample2b)
	foreach sector of numlist 311/316 321/327 331/337 339{
		countN, sample(sample2a_`sector')
	}
	foreach sector of numlist 311/316 321/327 331/337 339{
		countN, sample(sample2b_`sector')
	}
	*/
********************************************************************************
********************************************************************************
***  II. ANALYSIS
********************************************************************************
********************************************************************************
*** Aggregate markup 
tabstat mp_cshare [aweight=tvs] if sample1==1, by (year) stat (mean)
tabstat mp_cshare [aweight=l0]  if sample1==1, by (year) stat (mean)
tabstat mp_cshare               if sample1==1, by (year) stat (mean)

*** Baseline regressions
gen lntvs = log(tvs)
*reghdfe lhs_cshare lntvs if sample1a==1, absorb(i.year)
reghdfe lhs_cshare lntvs if sample1a==1, absorb(i.naics#i.year)
reghdfe lhs_cshare lntvs if sample2a==1, absorb(i.naics#i.year firm_ind_id)

	
	** Robustness: markup winsirzed at 1 percentile --> DIFFERENT SAMPLE!
	gen lhs_cshare_w1 = 1/mp_cshare_w1 + log(1-1/mp_cshare_w1)
	replace lhs_cshare_w1 = lhs_cshare if (lhs_cshare==. & lhs_cshare_w1!=.)|(lhs_cshare!=. & lhs_cshare_w1==.)	//keep the same sample
	*reghdfe lhs_cshare_w1 lntvs if sample1a==1, absorb(i.year)
	reghdfe lhs_cshare_w1 lntvs if sample1a==1, absorb(i.naics#i.year)
	reghdfe lhs_cshare_w1 lntvs if sample2a==1, absorb(i.naics#i.year firm_ind_id)
	
	
*** Industry-specific estimates 
foreach sector of numlist 311/316 321/327 331/337 339{
	display `sector'
	reghdfe lhs_cshare lntvs if sample1a_`sector'==1, absorb(i.naics#i.year)
	reghdfe lhs_cshare lntvs if sample2a_`sector'==1, absorb(i.naics#i.year firm_ind_id)	
}

*** More flexible variation based on bin plot
reghdfe mp_cshare, absorb(i.naics#i.year) res
predict mp_cshare_clean, residuals

tabstat ln_mkt_sh mp_cshare_clean if sample1==1, by(bin_ln_mkt_sh) stat(mean)

********************************************************************************
********************************************************************************
***  III. DISCLOSRUE ANALYSIS
********************************************************************************
********************************************************************************
local varlist = "sw sw_nl l0"
*local varlist = "sw sw_nl l0 tvs"
foreach var in `varlist' {
di "`var'"

*** Tab 1: Aggregate markup 
local tablename = "tab1"
global dummies "sample1 sample1_1972 sample1_1977 sample1_1982 sample1_1987 sample1_1992 sample1_1997 sample1_2002 sample1_2007 sample1_2012"
global spreadsheet "discstats_feb_2020_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "Aggregate markup"

*** Tab 2: Baseline Regression 
local tablename = "tab2"
global dummies "sample1a sample2a sample1b sample2b"
global spreadsheet "discstats_feb_2020_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "Baseline Regression"

*** Tab 3: Sectoral Regression
local tablename = "tab3"
#delimit;
global dummies "sample1a_311 sample1a_312 sample1a_313 sample1a_314 sample1a_315 sample1a_316 
		sample1a_321 sample1a_322 sample1a_323 sample1a_324 sample1a_325 sample1a_326 sample1a_327
		sample1a_331 sample1a_332 sample1a_333 sample1a_334 sample1a_335 sample1a_336 sample1a_337 sample1a_339
		
		sample2a_311 sample2a_312 sample2a_313 sample2a_314 sample2a_315 sample2a_316 
		sample2a_321 sample2a_322 sample2a_323 sample2a_324 sample2a_325 sample2a_326 sample2a_327
		sample2a_331 sample2a_332 sample2a_333 sample2a_334 sample2a_335 sample2a_336 sample2a_337 sample2a_339
		
		sample2b_311 sample2b_312 sample2b_313 sample2b_314 sample2b_315 sample2b_316 
		sample2b_321 sample2b_322 sample2b_323 sample2b_324 sample2b_325 sample2b_326 sample2b_327
		sample2b_331 sample2b_332 sample2b_333 sample2b_334 sample2b_335 sample2b_336 sample2b_337 sample2b_339
		";

#delimit cr

global spreadsheet "discstats_feb_2020_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "Sectoral Regression"

*** Tab 4: Markup Patterns 
local tablename = "tab4"
global dummies "sample1_bin1 sample1_bin2 sample1_bin3 sample1_bin4 sample1_bin5 sample1_bin6 sample1_bin7 sample1_bin8 sample1_bin9 sample1_bin10"
global spreadsheet "discstats_feb_2020_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "Markup Patterns"


}

log close 
